
*******************************************************************************************
*                               Code for reproducing
*         "Switching a Face-to-Face Panel to Self-Administered Survey Modes: 
*    Experimental Evidence on Effects of Mode Assignment on Response and Selectivity"
*       By: Jette Schröder, Claudia Schmiedeberg, Josef Brüderl, Christiane Bozoyan
*                                  20. January 2025
*******************************************************************************************


**********************************************************************
*                         Do-File 01: 
* pulling the data from the pairfam database and constructing the variables
**********************************************************************


*******************************************************
*            Data version used
*   pairfam Data Release 14.2 (31-05-2024)
*   doi: 10.4232/pairfam.5678.14.2.0
*******************************************************



********************************************************************************
********************************************************************************
*****       Merging the data                                  ******************
*****    (W14 gross data + W12 + W13)                         ******************
********************************************************************************
********************************************************************************

version 18.0 // Version of Stata used for the paper
clear all
set maxvar 10000

* Define file paths where pairfam release14 (ZA5678_v14-2_pairfam) is stored 
global path_pairfam_data       C:\Daten\pairfam\release14\ZA5678_v14-2_pairfam             // pairfam directory
global path_pairfam_data_stata C:\Daten\pairfam\release14\ZA5678_v14-2_pairfam\Data\Stata  // stata directory

 
* Define file path where generated data sets should be stored
global path_generated_data C:\Daten\pairfam\temp


* Load gross data set ("Bruttoband")
use "$path_pairfam_data\Paradata\anchor gross data sets\Stata\Paradata_Fieldwork_W1-W14.dta"

keep if wave==14   // This defines our analysis sample: all those eligible for W14


* Drop Step-up sample (N=638)
drop if sample==4
* The "Bruttoband" contains 8064 cases before dropping the Step-up-Sample (as reported in methods report "Methodenbericht"")
* and 7426 after (of these 490 are non-monotonic)


save "$path_generated_data\W14_gross_sample.dta", replace


cd "$path_pairfam_data_stata"


* Merge Cati and CAPI interviews of wave 13 to one data set
use          "anchor13_capi", clear
append using "anchor13_cati", gen(w13mode)
save         "$path_generated_data\anchor13_complete", replace   
/* N=7009 Interviews, (as expected based on technical paper 1) of which 6687 also participated in wave 12 */


* Merge Cati and CAPI interviews of wave 12 to one data set
use          "anchor12_capi", clear
append using "anchor12_cati", gen(w13mode)
save         "$path_generated_data\anchor12_complete", replace   


* Merge gross sample of wave 14 and data of wave 13 for monotonic cases
use "$path_generated_data\W14_gross_sample.dta", clear
describe 
drop if d0==0
save "$path_generated_data\W14_gross_sample_plus_W13.dta", replace
merge 1:1 id using "$path_generated_data\anchor13_complete"
tab int6 _merge, m 
/* 73 cases with interview in wave 13 are not part of the gross sample for wave 14. 
  N=67 of them said on their own accord that they did not want to participate again at the end of the interview of w13 (int6). 
  The other cases probably refused later, but before fielding of wave 14. */
keep if _merge==3
save "$path_generated_data\W14_gross_sample_plus_W13.dta", replace


* Merge gross sample of wave 14 and data of wave 12  for non-monotonic cases
use "$path_generated_data\W14_gross_sample.dta", clear
describe 
drop if d0==1
save "$path_generated_data\W14_gross_sample_plus_W12.dta", replace
merge 1:1 id using "$path_generated_data\anchor12_complete"
tab int6 _merge, m 
keep if _merge==3
save "$path_generated_data\W14_gross_sample_plus_W12.dta", replace


* Create one data set including monotonic and non-monotonic cases
append using "$path_generated_data\W14_gross_sample_plus_W13.dta", gen(d0control)
drop _merge
save "$path_generated_data\W14_gross_sample_plus_W13_W12.dta", replace


* Merge Big Five measured in wave 10 for long-term respondents
* and in wave 11 for sample added in wave 11.

* Long-term respondents
use "$path_generated_data\W14_gross_sample_plus_W13_W12.dta", clear
keep if sample ~= 3 /*keep long-term respondents*/
merge 1:1 id using "$path_pairfam_data_stata\anchor10", keepusing(per3i*)
tab _merge
keep if _merge==3 |_merge==1
save "$path_generated_data\W14_gross_sample_plus_W13_W12_BF1.dta", replace

* Wave 11 sample
use "$path_generated_data\W14_gross_sample_plus_W13_W12.dta", clear
keep if sample == 3 /*keep new sample*/
merge 1:1 id using "$path_pairfam_data_stata\anchor11", keepusing(per3i*)
describe per3i*
keep if _merge==3| _merge==1
save "$path_generated_data\W14_gross_sample_plus_W13_W12_BF2.dta",replace

append using "$path_generated_data\W14_gross_sample_plus_W13_W12_BF1.dta"

tab per3i1, m


save "$path_generated_data\W14_gross_sample_plus_W13_W12.dta", replace


*Delete created data sets not needed anymore
erase "$path_generated_data\W14_gross_sample_plus_W12.dta"
erase "$path_generated_data\W14_gross_sample_plus_W13.dta"
erase "$path_generated_data\anchor12_complete.dta"
erase "$path_generated_data\anchor13_complete.dta"
erase "$path_generated_data\W14_gross_sample.dta"
erase "$path_generated_data\W14_gross_sample_plus_W13_W12_BF1.dta"
erase "$path_generated_data\W14_gross_sample_plus_W13_W12_BF2.dta"




********************************************************************************
********************************************************************************
*****       Defining the variables used in the analyses       ******************
*****    (Variables not used in the paper are commented out)  ******************
********************************************************************************
********************************************************************************

cd  "$path_generated_data"
use "W14_gross_sample_plus_W13_W12.dta", clear

* All respondents from W13, and soft-refusals (non-monotonic) from W13 got an invitation
tab wave d0     // these are the target cases for our experiment

* All pairfam samples included
fre sample


************************************************************************************
* Treatment: switch from interviewer- (CAPI) to self-administered mode (CAWI, PAPI)
************************************************************************************

gen self = f2f_mode
recode self 0=1 1=0
label var self "Interview mode"
label def selflbl 0 "interviewer-administered" 1 "self-administered"
label values self selflbl
tab self f2f_mode, m

fre self

********************************************************************************
* Outcome: 0=failure (no interview), 1=success (interview completed)
********************************************************************************

* This is the final result to the interview request (disposition code)
* fre ergfinal
fre erg_ges
tab erg_ges self

* Defining the outcome variable: interview completed (in the assigned mode)
gen     response = 0                                     // no response/refused/incomplete interview
replace response = 1   if inlist(erg_ges,35)             // interview completed (both modes) 

* ??? - in the code "online-interview" in den data set ist not right as it is only applicable to the self-administered mode)

replace response = 1   if inlist(erg_ges,34,37,38) & self==1  // interview completed (mode "self-administered") 
label var response "Response"
label def responselbl 0 "no interview" 1 "interview completed"
label values response responselbl

/* Note 1:
Neutral non-response (respondent has moved to unknown location (N=47), respondent has died (N=2),
respondent not reached over 6 months (N=8)) not coded as missing, but as failure.
This was done, because the modes differ in the probability to detect those neutral non-responses.
*/

/* Note 2:
Some respondents assigned to interviewer-administered refused, 
but they completed a PAPI (N=46), or a CAWI (N=32),
or were interviewed by telephone (N=6). These were recorded as failures.
These mode changes happened, because the survey agency priorised response over experiment.
A mode change in the other direction (self-administered -> interviewer-administered), was not allowed.
*/

tab erg_ges response   if self==1, col    // proportion PAPI
tab erg_ges response   if self==0, col 

********************************************************************************
* Control variables
********************************************************************************

****** Design variables ************

recode cohort 4=0           // cohort
lab def cohort 0"2001-03" 1 "1991-93" 2 "1981-83" 3 "1971-73"
lab val cohort cohort
tab age cohort

lab def sample 1 "pairfam main" 2 "DemoDiff" 3 "refreshment"
lab val sample sample
lab var sample "Sample"
tab sample cohort           // sample
tab self sample, col chi2


  
* fre intid                   // W13/W12 interviewer ID

  
****** Socio-demographics ******************

gen     female =      sex_gen== 2                // gender
replace female = . if sex_gen < 0    //N=1
lab var female "Gender"
lab def femalelab 0"male" 1"female"
lab val female femalelab

tab yeduc                                        // education
gen     inschool = yeduc==0
lab var inschool "Currently student"
replace inschool=.  if yeduc==-7     //N=17
replace yeduc   =.  if yeduc==-7

tab isced
gen iscedgrouped = isced
recode iscedgrouped 1 2 3=1 4 5=2 6=3 7 8=4 -7=.
tab iscedgrouped if inschool ==0
lab var iscedgrouped "Education (ISCED)"
label define iscedlbl 0"enrolled" 1 "<= lower secondary" 2"upper secondary" 3"post-secondary" 4"tertiary"
label values iscedgrouped iscedlbl
tab isced iscedgrouped


tab lfs inschool,m                               // labor force status
gen  emplstat = .
replace emplstat = 0   if lfs==2 | lfs==3      //homemaker (including parental leave)
replace emplstat = 1   if inlist(lfs,10,11)    //part-time 
replace emplstat = 2   if lfs==9               //full-time
replace emplstat = 3   if lfs==12              //self-employed
replace emplstat = 4   if lfs==4               //unemployed
replace emplstat = 5   if lfs==1 | lfs==8      //education, vocational training
replace emplstat = 6   if inlist(lfs,5,6,7,13) //other
label def emplstatlbl 0 "homemaker" 1 "part-time" 2 "full-time" 3"self-employed" 4 "unemployed"  5"education" 6 "other"
label values emplstat emplstatlbl
label var emplstat "Employment status"
tab lfs emplstat, m                   //N=8



******** Big Five **********************

mvdecode per3i*, mv(-7/-1)   // coding missing values

* Reversing some items
foreach var of varlist per3i1 per3i2 per3i8 per3i9 per3i11 per3i12 per3i17 per3i21 {
    recode `var' (1=5) (2=4) (3=3) (4=2) (5=1) , gen(`var'_r) 
	lab def `var'_r 1 "1 sehr zutreffend" 5 "5 sehr unzutreffend" 
	lab val `var'_r `var'_r
} 

***Neuroticism***
egen help=rownonmiss(per3i9_r per3i4 per3i14 per3i19)
egen neurot=rowmean(per3i9_r per3i4 per3i14 per3i19) if inlist(help,3,4)
drop help 
lab var neurot "Neuroticism"
lab def neurot 1 "1(low)" 5 "5(high)"
lab val neurot neurot

****Extraversion****
egen help=rownonmiss(per3i1_r per3i11_r per3i6 per3i16)
egen extrav=rowmean(per3i1_r per3i11_r per3i6 per3i16) if inlist(help,3,4)
drop help 
lab var extrav "Extraversion"
lab def extrav 1 "1(low)" 5 "5(high)"
lab val extrav extrav

****Agreeableness****
egen help=rownonmiss(per3i2_r per3i12_r per3i17_r per3i7)
egen agreeable=rowmean(per3i2_r per3i12_r per3i17_r per3i7) if inlist(help,3,4)
drop help 
lab var agreeable "Agreeableness"
lab def agreeable 1 "1(low)" 5 "5(high)"
lab val agreeable agreeable

****Conscientiousness****
egen help=rownonmiss(per3i8_r per3i3 per3i13 per3i18)
egen conscient=rowmean(per3i8_r per3i3 per3i13 per3i18) if inlist(help,3,4)
drop help 
lab var conscient "Conscientiousness"
lab def conscient 1 "1(low)" 5 "5(high)"
lab val conscient conscient

****Openness****
egen help=rownonmiss(per3i21_r per3i5 per3i10 per3i15 per3i20)
egen openness=rowmean(per3i21_r per3i5 per3i10 per3i15 per3i20) if inrange(help,3,5)
drop help 
lab var openness "Openness"
lab def openness 1 "1(low)" 5 "5(high)"
lab val openness openness

* Number of missings on Big Five
gen mis = missing(neurot,extrav,agreeable,conscient,openness)
tab mis                        //N=229



keep response self erg_ges intid                           ///
    sample cohort                                          /// design variables
    female inschool iscedgrouped isced emplstat  		   /// socio-demographics
    neurot  extrav  agreeable  conscient  openness         /// big five (1-5 scale)

	
save "W14_gross_sample_plus_W13_W12 - analysis file", replace



